Reading data “income per person” through GitHub
data1 <- read.csv("https://raw.githubusercontent.com/shakirovb/STA553/main/income_per_person.csv")
#head(data1)
#tail(data1)
#I can confirm that data has been read through GitHub successfully
Reshaping data-1 to given conditions
reshdat <- data1 %>%
gather(key="Year", value="Income", - geo, na.rm=TRUE)
reshdat2 <- reshdat %>%
mutate(year=substr(Year,2,5)) %>%
select(-Year)
dataready <- rename(reshdat2, Country=geo)
head(dataready)
## Country Income year
## 1 Afghanistan 603 1800
## 2 Albania 667 1800
## 3 Algeria 715 1800
## 4 Andorra 1200 1800
## 5 Angola 618 1800
## 6 Antigua and Barbuda 757 1800
tail(dataready)
## Country Income year
## 42262 Vanuatu 2900 2018
## 42263 Venezuela 14200 2018
## 42264 Vietnam 6550 2018
## 42265 Yemen 2430 2018
## 42266 Zambia 3870 2018
## 42267 Zimbabwe 1950 2018
Reading data “life expectancy” through GitHub
data2 <- read.csv("https://raw.githubusercontent.com/shakirovb/STA553/main/life_expectancy_years.csv")
#head(data2)
#I can confirm that data has been read through GitHub successfully
Reshaping data-2 based on given conditions
lifeexp <- data2 %>%
gather(key="Year", value="Life Exp.", - geo, na.rm=TRUE)
lifeexp2 <- lifeexp %>%
mutate(year=substr(Year,2,5)) %>%
select(-Year)
dataready2 <- rename(lifeexp2, Country=geo)
head(dataready2)
## Country Life Exp. year
## 1 Afghanistan 28.2 1800
## 2 Albania 35.4 1800
## 3 Algeria 28.8 1800
## 5 Angola 27.0 1800
## 6 Antigua and Barbuda 33.5 1800
## 7 Argentina 33.2 1800
tail(dataready2)
## Country Life Exp. year
## 40948 Vanuatu 64.3 2018
## 40949 Venezuela 75.9 2018
## 40950 Vietnam 74.9 2018
## 40951 Yemen 67.1 2018
## 40952 Zambia 59.5 2018
## 40953 Zimbabwe 60.2 2018
Reshaping data-3 based on given conditions
data3 <- read.csv("https://raw.githubusercontent.com/shakirovb/STA553/main/population_total.csv")
#head(data3)
popdata <- data3 %>%
gather(key="Year", value="Population_size", - geo, na.rm=TRUE)
popdata2 <- popdata %>%
mutate(year=substr(Year,2,5)) %>%
select(-Year)
dataready3 <- rename(popdata2, Country=geo)
head(dataready3)
## Country Population_size year
## 1 Afghanistan 3280000 1800
## 2 Albania 410000 1800
## 3 Algeria 2500000 1800
## 4 Andorra 2650 1800
## 5 Angola 1570000 1800
## 6 Antigua and Barbuda 37000 1800
Merging three datasets by Country and Year
datacombo <- merge(dataready,dataready2, by=c("Country", "year"))
#head(datacombo)
datacombo2 <- merge(datacombo, dataready3, by=c("Country", "year"))
head(datacombo2)
## Country year Income Life Exp. Population_size
## 1 Afghanistan 1800 603 28.2 3280000
## 2 Afghanistan 1801 603 28.2 3280000
## 3 Afghanistan 1802 603 28.2 3280000
## 4 Afghanistan 1803 603 28.2 3280000
## 5 Afghanistan 1804 603 28.2 3280000
## 6 Afghanistan 1805 603 28.2 3280000
Year 2015 based output data frame
datacombo2015 <- datacombo2[which(datacombo2$year== "2015"), names(datacombo2) %in%
c("Country", "Income", "Life Exp.", "year", "Population_size")]
#head(datacombo2015)
#tail(datacombo2015)
R Studio based scatter plot with "hover text / pop-ups
myPlotlyLayout <- function(){
layout(
## graphic size
with = 700,
height = 700,
### Title
title =list(text = "Income vs Life Expectancy in 2015",
font = list(family = "Times New Roman", # HTML font family
size = 18,
color = "red")),
### legend
legend = list(title = list(text = 'Country',
font = list(family = "Courier New",
size = 14,
color = "green")),
bgcolor = "ivory",
bordercolor = "navy",
groupclick = "togglegroup", # one of "toggleitem" AND "togglegroup".
orientation = "v" # Sets the orientation of the legend.
),
## margin of the plot
margin = list(
b = 120,
l = 50,
t = 120,
r = 50
),
## Background
plot_bgcolor ='#f7f7f7',
## Axes labels
xaxis = list(
title=list(text = 'Life Expectancy',
font = list(family = 'Arial')),
zerolinecolor = 'red',
zerolinewidth = 2,
gridcolor = 'white'),
yaxis = list(
title=list(text = 'Income',
font = list(family = 'Arial')),
zerolinecolor = 'purple',
zerolinewidth = 2,
gridcolor = 'white'),
## annotations
annotations = list(
x = 0.7, # between 0 and 1. 0 = left, 1 = right
y = 0.9, # between 0 and 1, 0 = bottom, 1 = top
font = list(size = 12,
color = "darkred"),
text = "The point size is proportional to Population",
xref = "paper", # "container" spans the entire `width` of the plot.
# "paper" refers to the width of the plotting area only.
yref = "paper", # same as xref
xanchor = "center", # horizontal alignment with respect to its x position
yanchor = "bottom", # similar to xanchor
showarrow = FALSE
)
)
}
Life_Expectancy <- datacombo2015$`Life Exp.`
Income <- datacombo2015$Income
country <- datacombo2015$Country
year <- datacombo2015$year
popsize <- datacombo2015$Population_size
plot_ly(
data = datacombo2015,
x = ~Life_Expectancy, # Horizontal axis
y = ~Income, # Vertical axis
color = ~factor(country), # must be a numeric factor
text = ~country, # show the Country in the hover text
## using the following hovertemplate() to add the information of the
## two numerical variable to the hover text.
hovertemplate = paste('<i><b>Income<b></i>: %{y}',
'<br><b>Life_Expectancy</b>: %{x}',
'<br><b></b>'),
alpha = 0.9,
size = ~popsize,
type = "scatter",
mode = "markers",
title = "Income vs Life Expectancy in 2015"
)
Saving merged datasets to create better Tableau visualization
write.csv(datacombo2, "/Users/bobbyshakirov/Desktop/WCU\\datacombo3.csv", row.names = TRUE)
#install.packages("writexl")
library("writexl")
#write_xlsx(datacombo2, "/Users/bobbyshakirov/Library/CloudStorage/OneDrive-WestChesterUniversityofPA/STA #553/ASSIGNEMNT\\combodataTableau")